package com.guchenbo.spark.core

/**
 * @author guchenbo
 * @date 2022/3/5
 */
object RddGroupBy {
  def main(args: Array[String]): Unit = {
    val sc = SparkUtils.sc("map")
    val rdd = sc.makeRDD(List(1, 2, 3, 4))
    val r1 = rdd.groupBy(i => i % 2)
    //    r1.collect().foreach(println)

    val lines = sc.textFile("input/access.log")
    lines.groupBy(s => {
      // 以下逻辑计算key
      s.split(" ")(3).substring(16, 18)
    }).sortBy(_._2.size, ascending = false).collect().foreach(e => println(e._1, e._2.size))
  }
}
